Install the following libraries before running the TDA pipeline.
#install.packages(c("TDA", "Matrix"))
library("Matrix") #package for sparse matrices
library("TDA")
working_dir_path <- setwd("~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline")
source("utilities.R")
You only need to run this block once
dir.create("saved_computations", showWarnings = FALSE)
saved_computations <- concat_path(working_dir_path, "saved_computations")
#choose names for two data groups
groups <- list("group_0", "group_25")
for (i in 1:length(groups)){
dir.create(concat_path(saved_computations,groups[i]), showWarnings = FALSE)
}
You only need to run this block once; after running it once comment it out
#max_values <- data.frame(matrix(ncol = 2, nrow = 0))
#colnames(max_values) <- c("maxbirth", "maxdeath")
#write.csv(max_values, concat_path(saved_computations,"max-values.csv"), row.names=FALSE)
#specify location path of the file with the cell type codes and their colors
cell_types_path <- "~/Documents/GitHub/TDA-Microscopy-Pipeline/Identification/cell_colors.csv"
#read all cell type codes from your designated file
cell_types_file <- read.csv(cell_types_path)
all_cell_types <- cell_types_file$Code
# indicate cell types for only including specific cells
cell_types <- all_cell_types[2] # this can either be a single value or a vector
index <- NaN # default index for cell types is the last column
sprintf("Cell types used: %s", paste(cell_types, collapse=", "))
## [1] "Cell types used: 1"
#choose a group folder where to save computations
group <- "group_0"
# specify location path of the csv files of that group
csv_dir_path <- "~/Documents/GitHub/TDA-Microscopy-Pipeline/Discretized-images/0"
We use Delaunay complex filtration to compute persistence diagrams
compute_PDs(csv_dir_path, group, saved_computations, cell_types, index)
## [1] "Processing file 0_dox_1.csv"
## [1] "Processing file 0_dox_10.csv"
## [1] "Processing file 0_dox_11.csv"
## [1] "Processing file 0_dox_12.csv"
## [1] "Processing file 0_dox_13.csv"
## [1] "Processing file 0_dox_14.csv"
## [1] "Processing file 0_dox_15.csv"
## [1] "Processing file 0_dox_16.csv"
## [1] "Processing file 0_dox_2.csv"
## [1] "Processing file 0_dox_3.csv"
## [1] "Processing file 0_dox_4.csv"
## [1] "Processing file 0_dox_5.csv"
## [1] "Processing file 0_dox_6.csv"
## [1] "Processing file 0_dox_7.csv"
## [1] "Processing file 0_dox_8.csv"
## [1] "Processing file 0_dox_9.csv"
## [1] "Persistence diagrams are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0/group_0_PDs.RData"
This is crucial for VI block and will ensure that all plots have the same scale
#Before running this block, compute persistence diagrams for all data groups (i.e choose another data group and specify its location in I-block, and compute its persistence diagrams in II-block)
max_values <- read.csv(concat_path(saved_computations, "max-values.csv"))
max_birth <- max(max_values$maxbirth)
max_death <- max(max_values$maxdeath)
save_plot <- TRUE #set FALSE if you don't want to save plots
plot_PDs(group, max_birth, max_death, saved_computations, save_plot)
## [1] "Persistence diagrams plots are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0"
#choose a persistence threshold
persist_threshold <- 20
save_plot <- TRUE #set FALSE if you don't want to save plots
#plot representative cycles with persistence above the chosen threshold
plot_representative_cycles(csv_dir_path, cell_types, persist_threshold, group, saved_computations, save_plot)
## [1] "Processing file 0_dox_1.csv"
## # Generated complex of size: 231
## [1] "Processing file 0_dox_10.csv"
## # Generated complex of size: 1089
## [1] "Processing file 0_dox_11.csv"
## # Generated complex of size: 1925
## [1] "Processing file 0_dox_12.csv"
## # Generated complex of size: 2901
## [1] "Processing file 0_dox_13.csv"
## # Generated complex of size: 499
## [1] "Processing file 0_dox_14.csv"
## # Generated complex of size: 1263
## [1] "Processing file 0_dox_15.csv"
## # Generated complex of size: 1319
## [1] "Processing file 0_dox_16.csv"
## # Generated complex of size: 2281
## [1] "Processing file 0_dox_2.csv"
## # Generated complex of size: 669
## [1] "Processing file 0_dox_3.csv"
## # Generated complex of size: 1819
## [1] "Processing file 0_dox_4.csv"
## # Generated complex of size: 1771
## [1] "Processing file 0_dox_5.csv"
## # Generated complex of size: 565
## [1] "Processing file 0_dox_6.csv"
## # Generated complex of size: 1387
## [1] "Processing file 0_dox_7.csv"
## # Generated complex of size: 3089
## [1] "Processing file 0_dox_8.csv"
## # Generated complex of size: 2703
## [1] "Processing file 0_dox_9.csv"
## # Generated complex of size: 645
## [1] "Representative cycle are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0"
You need to have persistence diagrams computed first
#choose a discretization step (use the same for all groups)
discr_step <- 0.3
save_computations_csv <- FALSE #set TRUE if additionally to saving persistence landscapes in RData format you want to save each of them in a separate csv file. This option is provided if, for example, you plan to use persistence landscapes for further analysis in another programming language. Note that in a csv file row k corresponds to a persistence landscape function at depth k, where depth 1 corresponds to a outermost persistence landscape function, and column names correspond to x-values.
compute_PLs(group, max_birth, max_death, discr_step, saved_computations, save_computations_csv)
## [1] "Persistence landscapes are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0/group_0_PLs.RData"
save_plot <- TRUE #set FALSE if you don't want to save plots
plot_PLs(group, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Persistence landscape plots are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0/group_0"
You need to have persistence landscapes computed first
compute_avgPL(group, max_birth, max_death, discr_step, saved_computations)
## [1] "Average persistence landscape is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0/group_0_avgPL.RData"
save_plot <- TRUE #set FALSE if you don't want to save plots
plot_avgPL(group, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Average persistence landscape plot is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_0"
You need to have computed average persistence landscapes for both groups (make sure that they were plotted on the same scale)
save_plot <- TRUE #set FALSE if you don't want to save plots
#avgPL of groups[0] - avgPL of groups[1]
plot_avgPLs_difference(groups, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Plot of the difference of two average persistence landscapes is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations"
You need to have computed persistence landscapes for both groups
#number of permutations
num_repeats <- 10000
permutation_test_for_PLs(groups, saved_computations, num_repeats)
## [1] "p-value 0.0007"